clear all 
set more off 
set maxvar 15000 
clear matrix


**********************************************************************************************************************************************
**********************************************************************************************************************************************
**** IGE VS RANK OVER TIME BY SURVEY 
**********************************************************************************************************************************************
**********************************************************************************************************************************************

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 

    keep if baseline_sample==1 
    drop if data=="nsba" //exclude surveys whose respondents are only of one race
    
* Don't count decades with less than 150 obs
    bysort decade data: egen N_bydecade_bydata = sum(baseline_sample)
    tab data decade if N_bydecade_bydata<200
    replace decade=. if N_bydecade_bydata<200

* Run regressions
    forval i=1(1)2 {
        gen coeff_`i'=. 
        gen est_lb_`i' =.
        gen est_ub_`i' =.
    }
    
* Loop over data sources
    levelsof data, local(sources) clean
    foreach data in `sources' {
        
        levelsof decade if data=="`data'", local(decades)   

        *1. IGE
            foreach x of local decades {
            quietly reg log_son_baseline log_father_baseline if decade==`x' & data=="`data'" [pw=wgt_sex_race], robust 
                replace coeff_1 = _b[log_father_baseline] if decade==`x' & data=="`data'"
                replace est_ub_1 = _b[log_father_baseline]+1.96*_se[log_father_baseline] if decade==`x' & data=="`data'"
                replace est_lb_1 = _b[log_father_baseline]-1.96*_se[log_father_baseline] if decade==`x' & data=="`data'"
            }
                
        *2. Rank
            foreach x of local decades {
            quietly reg rank_son_baseline rank_father_baseline if decade==`x' & data=="`data'" [pw=wgt_sex_race], robust 
                replace coeff_2 = _b[rank_father_baseline] if decade==`x' & data=="`data'"
                replace est_ub_2 = _b[rank_father_baseline]+1.96*_se[rank_father_baseline] if decade==`x' & data=="`data'"
                replace est_lb_2 = _b[rank_father_baseline]-1.96*_se[rank_father_baseline] if decade==`x' & data=="`data'"
            }
        
    }
        
    bysort decade data: keep if _n==1
    keep decade data est_* coeff*
    drop if decade==.
    sort decade data
        
    bysort decade: gen number = _n  
    gen decade2= decade
    replace decade = number + decade -3
    replace decade= decade-2 if decade2==1920
    replace decade= decade-1 if decade2==1930
    replace decade= decade+1 if decade2==1940
    replace decade= decade+2 if decade2==1950
    replace decade= decade+2 if decade2==1960
    replace decade= decade+2 if decade2==1970

* IGE
    preserve
        keep coeff_1 data decade2
            
        #delimit ;
        twoway 
              (connect coeff decade2 if data=="anes", mcolor(purple) m(square) lc(purple) lpat(solid) lwidth(0.225))
              (connect coeff decade2 if data=="avtmh57",  mcolor(orange*0.75) m(triangle) lc(orange*0.75) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="avtmh76", mcolor(midgreen) m(diamond) lc(midgreen) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="ocg62",  mcolor(lavender) m(diamond)  lc(lavender) lpat(solid) lwidth(0.25))        
              (connect coeff decade2 if data=="ocg73", mcolor(black) m(diamond_hollow) lc(black) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="gss", mcolor(midblue*0.85) m(triangle)  lc(midblue*0.85) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="psid", mcolor(navy) m(circle) lc(navy) lpat(solid) lwidth(0.25))    
              (connect coeff decade2 if data=="nsfh", mcolor(dkgreen)  m(circle) lc(dkgreen) lpat(solid) lwidth(0.25))
               ,
        xti(" " "Decade of respondent's birth") xlabel(1910(10)1970) xscale(range(1905 1975))
        legend(on row(3) size(small) symxsize(10) order(1 "ANES" 2 "AVTMH57" 3 "AVTMH76" 4 "OCG62" 5 "OCG73" 6 "GSS" 7 "PSID" 8 "NSFH" )) 
        ylabel(0(.2)1, axis(1)) yti("IGE coefficient" " ", axis(1))
        xlabel(1910 "1910s" 1920 "1920s" 1930 "1930s" 1940 "1940s" 1950 "1950s" 1960 "1960s" 1970 "1970s", labsize(small) ) ;  
        #delimit cr
        graph export "$Mydirectory2/appendix_a/IGE_bysurvey.pdf", as(pdf) replace    
    restore

* Rank
    preserve
        keep decade2 data coeff_2 

        #delimit ;
        twoway 
              (connect coeff decade2 if data=="anes", mcolor(purple) m(square) lc(purple) lpat(solid) lwidth(0.225))
              (connect coeff decade2 if data=="avtmh57",  mcolor(orange*0.75) m(triangle) lc(orange*0.75) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="avtmh76", mcolor(midgreen) m(diamond) lc(midgreen) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="ocg62",  mcolor(lavender) m(diamond)  lc(lavender) lpat(solid) lwidth(0.25))        
              (connect coeff decade2 if data=="ocg73", mcolor(black) m(diamond_hollow) lc(black) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="gss", mcolor(midblue*0.85) m(triangle)  lc(midblue*0.85) lpat(solid) lwidth(0.25))
              (connect coeff decade2 if data=="psid", mcolor(navy) m(circle) lc(navy) lpat(solid) lwidth(0.25))    
              (connect coeff decade2 if data=="nsfh", mcolor(dkgreen)  m(circle) lc(dkgreen) lpat(solid) lwidth(0.25))
               ,

        xti(" " "Decade of respondent's birth") xlabel(1910(10)1970) xscale(range(1905 1975))
        legend(on row(3) size(small) symxsize(10) order(1 "ANES" 2 "AVTMH57" 3 "AVTMH76" 4 "OCG62" 5 "OCG73" 6 "GSS" 7 "PSID" 8 "NSFH" )) 
        ylabel(0.1(.1)0.45, axis(1)) yti("Rank coefficient" " ", axis(1))
        xlabel(1910 "1910s" 1920 "1920s" 1930 "1930s" 1940 "1940s" 1950 "1950s" 1960 "1960s" 1970 "1970s", labsize(small) ) ;  
        #delimit cr
        graph export "$Mydirectory2/appendix_a/Rank_bysurvey.pdf", as(pdf) replace    
    restore